{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "d9fe77410b4b4d28",
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "source": [
    "# How to do experiment via Limit Distribution? (ABn Test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7f5730c16e20fe3e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-19T07:40:30.120314Z",
     "start_time": "2024-02-19T07:40:22.040562Z"
    }
   },
   "outputs": [],
   "source": [
    "from scipy.stats import bernoulli\n",
    "\n",
    "from lightautoml.addons.hypex.abn_test import min_sample_size\n",
    "from lightautoml.addons.hypex.abn_test import test_on_marginal_distribution\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3f990ce477eadc58",
   "metadata": {},
   "source": [
    "### Initialize random state"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ec6339fd4b9bfbe9",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-19T07:50:19.576582Z",
     "start_time": "2024-02-19T07:50:19.565297Z"
    }
   },
   "outputs": [],
   "source": [
    "seed = 42  # You can choose any number as the seed\n",
    "random_state = np.random.RandomState(seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "56a30e356af1c7ed",
   "metadata": {},
   "source": [
    "## Multiple testing for best sample selection"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a75d120b056c6172",
   "metadata": {},
   "source": [
    "### Number of samples and parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3cbf54ea3ce36deb",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-19T07:50:24.608488Z",
     "start_time": "2024-02-19T07:50:24.597353Z"
    }
   },
   "outputs": [],
   "source": [
    "num_samples = 10  # Number of samples\n",
    "minimum_detectable_effect = 0.05  # MDE\n",
    "assumed_conversion = 0.3  # Assumed conversion rate\n",
    "significance_level = 0.05  # Significance level\n",
    "power_level = 0.2  # Power level (1 - beta)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f519dc7b24067462",
   "metadata": {},
   "source": [
    "### Calculate the minimum sample size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "67b353030a5f7356",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-19T07:50:28.734673Z",
     "start_time": "2024-02-19T07:50:26.677119Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sample size = 1313\n"
     ]
    }
   ],
   "source": [
    "sample_size = min_sample_size(\n",
    "    num_samples,\n",
    "    minimum_detectable_effect,\n",
    "    variances=assumed_conversion * (1 - assumed_conversion),\n",
    "    significance_level=significance_level,\n",
    "    power_level=power_level,\n",
    "    equal_variance=True,\n",
    ")\n",
    "print(f\"Sample size = {sample_size}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8ce92cbd46e2e66f",
   "metadata": {},
   "source": [
    "### Testing samples with equal conversion rate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ac90451bc36c6044",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-19T07:51:05.235352Z",
     "start_time": "2024-02-19T07:50:32.706689Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Samples with equal conversion rate\n",
      "\tAccepted hypothesis H(0)\n",
      "\tAccepted hypothesis H(0)\n",
      "\tAccepted hypothesis H(0)\n",
      "\tAccepted hypothesis H(0)\n",
      "\tAccepted hypothesis H(0)\n"
     ]
    }
   ],
   "source": [
    "print(\"\\nSamples with equal conversion rate\")\n",
    "for _ in range(5):\n",
    "    samples = bernoulli.rvs(\n",
    "        assumed_conversion, size=[num_samples, sample_size], random_state=random_state\n",
    "    )\n",
    "    hypothesis = test_on_marginal_distribution(\n",
    "        samples, significance_level=significance_level\n",
    "    )\n",
    "    print(f\"\\tAccepted hypothesis H({hypothesis})\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6cddf5b6661f161c",
   "metadata": {},
   "source": [
    "### Testing where the last sample has a higher conversion rate by MDE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "6f61bafb73e0eaad",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-19T07:51:42.685919Z",
     "start_time": "2024-02-19T07:51:05.238615Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Last sample has higher conversion by MDE\n",
      "\tAccepted hypothesis H(10)\n",
      "\tAccepted hypothesis H(10)\n",
      "\tAccepted hypothesis H(10)\n",
      "\tAccepted hypothesis H(10)\n",
      "\tAccepted hypothesis H(10)\n"
     ]
    }
   ],
   "source": [
    "print(\"\\nLast sample has higher conversion by MDE\")\n",
    "for _ in range(5):\n",
    "    samples = [\n",
    "        bernoulli.rvs(assumed_conversion, size=sample_size, random_state=random_state)\n",
    "        for _ in range(num_samples - 1)\n",
    "    ]\n",
    "    samples.append(\n",
    "        bernoulli.rvs(\n",
    "            assumed_conversion + minimum_detectable_effect,\n",
    "            size=sample_size,\n",
    "            random_state=random_state,\n",
    "        )\n",
    "    )\n",
    "    hypothesis = test_on_marginal_distribution(\n",
    "        samples, significance_level=significance_level\n",
    "    )\n",
    "    print(f\"\\tAccepted hypothesis H({hypothesis})\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5cda8f5b9ac6f05c",
   "metadata": {},
   "source": [
    "## Multiple testing for best client income sample (conversion * price)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1c248f764c4a24b8",
   "metadata": {},
   "source": [
    "### Parameters for different samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "14b5c6bdd7967b6f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-19T07:41:52.147726Z",
     "start_time": "2024-02-19T07:41:52.133629Z"
    }
   },
   "outputs": [],
   "source": [
    "num_samples = 5  # Number of samples\n",
    "minimum_detectable_effect = 2.5  # MDE\n",
    "prices = [100, 150, 150, 200, 250]  # Tariff prices\n",
    "conversions = [0.15, 0.1, 0.1, 0.075, 0.06]  # Tariff conversions\n",
    "significance_level = 0.05\n",
    "power_level = 0.2\n",
    "variances = [\n",
    "    price ** 2 * conversion * (1 - conversion)\n",
    "    for price, conversion in zip(prices, conversions)\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "378d5887ef93b9e0",
   "metadata": {},
   "source": [
    "### Calculate minimum sample size for unequal variances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "13b3d7fa50a129a",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-19T07:42:09.825024Z",
     "start_time": "2024-02-19T07:41:52.150025Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sample size = 7200\n"
     ]
    }
   ],
   "source": [
    "sample_size = min_sample_size(\n",
    "    num_samples,\n",
    "    minimum_detectable_effect,\n",
    "    variances=variances,\n",
    "    significance_level=significance_level,\n",
    "    power_level=power_level,\n",
    "    equal_variance=False,\n",
    ")\n",
    "print(f\"Sample size = {sample_size}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "43298f88aa5666a2",
   "metadata": {},
   "source": [
    "### Testing samples with equal ARPU (Average Revenue Per User)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "55423e5fcc7dd753",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-19T07:42:25.335995Z",
     "start_time": "2024-02-19T07:42:17.533344Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Samples with equal ARPU\n",
      "\tAccepted hypothesis H(0)\n",
      "\tAccepted hypothesis H(0)\n",
      "\tAccepted hypothesis H(4)\n",
      "\tAccepted hypothesis H(0)\n",
      "\tAccepted hypothesis H(0)\n"
     ]
    }
   ],
   "source": [
    "print(\"\\nSamples with equal ARPU\")\n",
    "for _ in range(5):\n",
    "    samples = [\n",
    "        price * bernoulli.rvs(conversion, size=sample_size)\n",
    "        for price, conversion in zip(prices, conversions)\n",
    "    ]\n",
    "    hypothesis = test_on_marginal_distribution(\n",
    "        samples, significance_level=significance_level\n",
    "    )\n",
    "    print(f\"\\tAccepted hypothesis H({hypothesis})\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b7b9e1039797e997",
   "metadata": {},
   "source": [
    "### Testing where the last sample has higher ARPU by MDE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "initial_id",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-19T07:42:33.114314Z",
     "start_time": "2024-02-19T07:42:25.337517Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Last sample has higher ARPU by MDE\n",
      "\tAccepted hypothesis H(5)\n",
      "\tAccepted hypothesis H(5)\n",
      "\tAccepted hypothesis H(5)\n",
      "\tAccepted hypothesis H(0)\n",
      "\tAccepted hypothesis H(5)\n"
     ]
    }
   ],
   "source": [
    "print(\"\\nLast sample has higher ARPU by MDE\")\n",
    "for _ in range(5):\n",
    "    samples = [\n",
    "        price * bernoulli.rvs(conversion, size=sample_size)\n",
    "        for price, conversion in zip(prices, conversions[:-1])\n",
    "    ]\n",
    "    samples.append(\n",
    "        prices[-1]\n",
    "        * bernoulli.rvs(\n",
    "            conversions[-1] + minimum_detectable_effect / prices[-1], size=sample_size\n",
    "        )\n",
    "    )\n",
    "    hypothesis = test_on_marginal_distribution(\n",
    "        samples, significance_level=significance_level\n",
    "    )\n",
    "    print(f\"\\tAccepted hypothesis H({hypothesis})\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d54ff8d1-c226-421f-bd86-914851dab222",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
